*************************************************
*************************************************
*COMPUTING COUSIN MARRIAGE FROM MARRIAGE RECORDS*
*************************************************
*************************************************


* Notation: 
	* f is for surname (family)
	* s if for state
	* y is for year
	* d is for decade


use "$dir/Data/Original/MR_cleaned_1750to1950.dta", clear


keep if year<=1940
keep if state!=""


gen pre = year <= 1858


*Generating decades from years*
gen decade=.
foreach x of numlist 1750(10)1950 {
replace decade=`x' if decade==. & year<=`x'+9
}


* Isonymy dummy
gen isonymous = bs_clean == gs_clean



******************************************************************************
 * (1) Convert dataset from marriage-level to person-level  
******************************************************************************


gen surname = ""
gen bride = 0
gen groom = 0

tempfile groom_and_bride

save `groom_and_bride'

replace surname = bs_clean
replace bride = 1


append using `groom_and_bride'

replace surname = gs_clean if surname == ""
replace groom = 1 if bride == 0

drop gs_clean bs_clean




******************************************************************************
 * (2) Convert to Surname-State-Year cells  
******************************************************************************

collapse (sum) Nf_fsy=bride  Nm_fsy=groom  isonymous_fsy = isonymous , by(surname state year decade pre)

gen N_fsy = Nf_fsy + Nm_fsy

tempfile surname_state_year

save `surname_state_year'


******************************************************************************
 * (3) Random Isonymy (State-decade pools)  
******************************************************************************

collapse (sum) Nf_fsd=Nf_fsy  Nm_fsd=Nm_fsy  isonymous_fsd = isonymous_fsy , by(surname state decade)

gen N_fsd = Nf_fsd + Nm_fsd


* Size of state-decade marriage pool
bysort state decade: egen Nm_sd=sum(Nm_fsd) 
bysort state decade: egen Nf_sd=sum(Nf_fsd) 

* Surname share of marriage pool (fraction that are surname 's')
gen nm_fsd=Nm_fsd/Nm_sd
gen nf_fsd=Nf_fsd/Nf_sd


* Calculate random isonymy
gen IsoRand_fsd=2*(nm_fsd*nf_fsd)/(nm_fsd+nf_fsd)


* Merge back with Surname-State-Year level dataset
merge 1:m surname state decade using `surname_state_year'

drop _merge




******************************************************************************
 * (4) Observed Isonymy (Surname-State-Year)  
******************************************************************************

gen IsoObs_fsy = isonymous_fsy / N_fsy






******************************************************************************
 * (5) Non-Random Isonymy (Surname-State-Year)  
******************************************************************************


gen IsoNonrand_fsy = (IsoObs_fsy - IsoRand_fsd) /(1-IsoRand_fsd) 


******************************************************************************
 * (6) Save Surname-State-Year level dataset   
******************************************************************************

label var isonymous_fsd "Isonymous marriages (surname-state-decade)"
label var isonymous_fsy "Isonymous marriages (surname-state-year)"
label var IsoObs_fsy "Observed isonymy (surname-state-year)"
label var IsoNonrand_fsy "Non-random isonymy (surname-state-year)"
label var Nf_fsd "Number of brides (surname-state-decade)"
label var Nm_fsd "Number of grooms (surname-state-decade)"
label var N_fsd "Number of individuals (surname-state-decade)"
label var Nf_fsy "Number of brides (surname-state-year)"
label var Nm_fsy "Number of grooms (surname-state-year)"
label var N_fsy "Number of individuals (surname-state-year)"
label var Nm_sd "Number of grooms (state-decade)"
label var Nf_sd "Number of brides (state-decade)"
label var IsoRand_fsd "Randomy Isonymy (surname-state-decade)"

save "$dir/Data/Final/Isonymy_fsy.dta", replace

**************************************************************************************************************
 * Collapse at surname - state - prepost level
**************************************************************************************************************

use "$dir/Data/Final/Isonymy_fsy.dta", clear

merge m:1 state using "$dir/Data/Original/year_of_ban.dta"
keep if _merge == 3

gen banyear = year >= year_of_ban 

gen Isononrand_fsy_noban = IsoNonrand_fsy if banyear == 0
gen N_fsy_noban = N_fsy if banyear == 0 

collapse (mean) IsoNonrand_fs = IsoNonrand_fsy IsoRand_fs = IsoRand_fsd IsoObs_fs = IsoObs_fsy IsoNonrand_fs_noban = Isononrand_fsy_noban /// 
	(rawsum) N_fs = N_fsy Nm_fs = Nm_fsy  Nf_fs = Nf_fsy N_fs_noban = N_fsy_noban ///
	[fw=N_fsy ] , by(surname state pre)

gen CM_fs = max(IsoNonrand_fs, 0) * 4
gen CM_fs_noban = max(IsoNonrand_fs_noban, 0) * 4


gen cmH = CM_fs >= 0.1 & pre == 1 /* High cousin marriage surname, based on pre-period surname-state specific
rates */

label var IsoNonrand_fs "Non-random isonymy"
label var IsoNonrand_fs_noban "Non-random isonymy: No Ban"
label var IsoRand_fs "Random Isonymy"
label var IsoObs_fs "Observed isonymy"
label var CM_fs "Cousin marriage rate (surname-state)"
label var CM_fs_noban "Cousin marriage rate (surname-state): No Ban"
label var cmH ">10% cousin marriage rate (-1858) (surname-state)"
label var N_fs "Number of individuals (surname-state)"
label var N_fs_noban "Number of individuals (surname-state): No Ban"
label var Nm_fs "Number of grooms (surname-state)"
label var Nf_fs "Number of brides (surname-state)"

save "$dir/Data/Final/Isonymy_fs", replace


**********************************************************
*** Collapse at state-year-cmH level ***
**********************************************************

use "$dir/Data/Final/Isonymy_fsy.dta", clear

keep if pre==1

collapse(mean) IsoNonrand_f = IsoNonrand_fsy [fw=N_fsy ], by (surname)
gen cm = max(IsoNonrand_f, 0) * 4
sort surname
gen cmH = cm >= 0.1
keep surname cmH

merge 1:m surname using "$dir/Data/Final/Isonymy_fsy.dta"
drop _merge

* Merge with state codes for maptile function
merge m:1 state using "$dir/Data/Original/US_state_codes.dta"
drop _merge

* renaming for maptile use
rename state statename
rename statecode state

/* Collapse at the state-year-cmH level */
collapse (mean) decade = decade IsoNonrand_sy = IsoNonrand_fsy IsoObs_sy = IsoObs_fsy (firstnm) statename ///
(rawsum) N_sy = N_fsy [aw = N_fsy], by(state year cmH)

label var IsoNonrand_sy "Non-random isonymy"
label var IsoObs_sy "Observed isonymy"
label var cmH ">10% cousin marriage rate (-1858) (surname)"
label var N_sy "Number of individuals (state-year)"
label var state "State Code"
label var statename "State"
label var decade "Decade"

	 
save "$dir/Data/Final/Isonymy_sy.dta", replace
